*This lab aims to explore the fMNIST dataset thoroughly, build and train a neural network using TensorFlow, assess its performance with sklearn, and draw clear conclusions based on the findings.*
# %pip install numpy
# %pip install matplotlib
# %pip install plotly.express
# %pip install pandas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
plotly.offline.init_notebook_mode()
# import tensorflow as tf
# from sklearn.metrics import confusion_matrix
# import matplotlib.pyplot as plt
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import classification_report
# from tensorflow.python import keras
# from tensorflow.python.keras.models import Sequential
# from keras.layers import Dense, Conv2D, Activation, MaxPool2D, Flatten, Dropout, BatchNormalization
# from keras.optimizers import RMSprop,Adam
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from keras.utils import plot_model
# import math
# from keras.optimizers import RMSprop
# import pickle
test = pd.read_csv("./data/fashion-mnist_test.csv")
train = pd.read_csv("./data/fashion-mnist_train.csv")
test.head()
| label | pixel1 | pixel2 | pixel3 | pixel4 | pixel5 | pixel6 | pixel7 | pixel8 | pixel9 | ... | pixel775 | pixel776 | pixel777 | pixel778 | pixel779 | pixel780 | pixel781 | pixel782 | pixel783 | pixel784 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 8 | ... | 103 | 87 | 56 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 34 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | 53 | 99 | ... | 0 | 0 | 0 | 0 | 63 | 53 | 31 | 0 | 0 | 0 |
| 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 137 | 126 | 140 | 0 | 133 | 224 | 222 | 56 | 0 | 0 |
| 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 785 columns
test.head()
| label | pixel1 | pixel2 | pixel3 | pixel4 | pixel5 | pixel6 | pixel7 | pixel8 | pixel9 | ... | pixel775 | pixel776 | pixel777 | pixel778 | pixel779 | pixel780 | pixel781 | pixel782 | pixel783 | pixel784 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 8 | ... | 103 | 87 | 56 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 34 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | 53 | 99 | ... | 0 | 0 | 0 | 0 | 63 | 53 | 31 | 0 | 0 | 0 |
| 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 137 | 126 | 140 | 0 | 133 | 224 | 222 | 56 | 0 | 0 |
| 4 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 785 columns
print("Fashion MNIST train - rows:",train.shape[0]," columns:", train.shape[1])
print("Fashion MNIST test - rows:",test.shape[0]," columns:", test.shape[1])
Fashion MNIST train - rows: 60000 columns: 785 Fashion MNIST test - rows: 10000 columns: 785
test.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10000 entries, 0 to 9999 Columns: 785 entries, label to pixel784 dtypes: int64(785) memory usage: 59.9 MB
train.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 60000 entries, 0 to 59999 Columns: 785 entries, label to pixel784 dtypes: int64(785) memory usage: 359.3 MB
*Both the training and testing datasets are composed of integer values, indicating that the labels are also represented by numerical values.*
X_train = train.iloc[:,1:].values.reshape(-1,28,28,1)
y_train = train.iloc[:,0].values.reshape(-1,1)
X_test = test.iloc[:,1:].values.reshape(-1,28,28,1)
y_test = test.iloc[:,0].values.reshape(-1,1)
apparel_items = {0 : 'T-shirt/top',
1 : 'Trouser',
2 : 'Pullover',
3 : 'Dress',
4 : 'Coat',
5 : 'Sandal',
6 : 'Shirt',
7 : 'Sneaker',
8 : 'Bag',
9 : 'Ankle boot'}
fig, axes = plt.subplots(3, 4, figsize = (5,5))
for row in axes:
for axe in row:
index = np.random.randint(60000)
img = train.drop('label', axis=1).values[index].reshape(28,28)
train_item = train['label'][index]
axe.imshow(img)
axe.set_title(apparel_items[train_item])
axe.set_axis_off()
import numpy as np
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 10, figsize=(20, 2))
# Rearrange the order of digits according to apparel_items dictionary
ordered_digits = [apparel_items[i] for i in range(10)]
# Iterate over each digit (class)
for digit, ax in zip(range(10), axes):
# Find indices of the current digit
digit_indices = np.where(y_train.astype('int8') == digit)[0]
# Calculate average image for the current digit
avg_image = np.mean(X_train[digit_indices], axis=0).reshape(28, 28)
# Plot the average image
ax.imshow(avg_image)
ax.set_title(ordered_digits[digit])
ax.axis('off')
plt.tight_layout()
plt.show()
The Sandal and Bag categories display more pixel variation across different positions compared to others. This variability may pose prediction challenges for the model.
def sample_images_data(data):
# An empty list to collect some samples
sample_images = []
sample_labels = []
# Iterate over the keys of the labels dictionary defined in the above cell
for k in apparel_items.keys():
# Get four samples for each category
samples = data[data["label"] == k].head(4)
# Append the samples to the samples list
for j, s in enumerate(samples.values):
# First column contain labels, hence index should start from 1
img = np.array(samples.iloc[j, 1:]).reshape(28,28)
sample_images.append(img)
sample_labels.append(samples.iloc[j, 0])
print("Total number of sample images to plot: ", len(sample_images))
return sample_images, sample_labels
train_sample_images, train_sample_labels = sample_images_data(train)
def plot_sample_images(data_sample_images,data_sample_labels,cmap="Blues"):
# Plot the sample images now
f, ax = plt.subplots(5,8, figsize=(16,10))
for i, img in enumerate(data_sample_images):
ax[i//8, i%8].imshow(img, cmap=cmap)
ax[i//8, i%8].axis('off')
ax[i//8, i%8].set_title(apparel_items[data_sample_labels[i]])
plt.show()
plot_sample_images(train_sample_images,train_sample_labels, "Greens")
Total number of sample images to plot: 40
import plotly.graph_objects as go
# Convert y_train to a one-dimensional array of integers
y_train = np.array(y_train).flatten().astype(np.int8)
# Count the occurrences of each class
class_counts = np.bincount(y_train)
# Get the class labels and names
class_labels = list(apparel_items.keys())
apparel_items = list(apparel_items.values())
# Define colors for each bar
colors = ['rgb(31, 119, 180)', 'rgb(255, 127, 14)', 'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
'rgb(148, 103, 189)', 'rgb(140, 86, 75)', 'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
# Create a bar graph using plotly
fig = go.Figure(data=[go.Bar(
x=class_labels,
y=class_counts,
text=apparel_items, # Adding text for hover
marker_color=colors
)])
# Update layout
fig.update_layout(
title='Number of samples per label',
xaxis=dict(
title='Class',
tickmode='array',
tickvals=class_labels,
ticktext=apparel_items,
tickangle=-45
),
yaxis=dict(title='Count')
)
# Show the plot
fig.show()
As we can see, there is no bias in the training dataset because there are the same amount of examples for every class.
import matplotlib.pyplot as plt
# Plot the distribution of pixel values
plt.figure(figsize=(6, 5))
plt.hist(X_train.flatten(), bins=50, edgecolor='yellow', color='red')
plt.title('Distribution of Pixel Values in our dataset')
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.show()
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from sklearn.model_selection import train_test_split
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
model = keras.models.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 784) 0
dense (Dense) (None, 128) 100480
dense_1 (Dense) (None, 10) 1290
=================================================================
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Model summary :
# Compile the model.
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
# Define the model
def create_model():
model = Sequential([
Flatten(input_shape=(28, 28)), # Assuming input shape is 28x28 for Fashion MNIST
Dense(128, activation='relu'),
Dense(10, activation='softmax') # Assuming 10 classes for Fashion MNIST
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
# Initialize variables for best model selection
best_model = None
best_val_loss = float('inf')
best_val_accuracy = 0
best_epochs = 0
best_batch_size = 0
# Define a list of epochs and batch sizes to try
epochs_list = [10]
batch_sizes = [128]
# Iterate over epochs and batch sizes
for epochs in epochs_list:
for batch_size in batch_sizes:
# Define and compile the model
model = create_model() # Assuming you have a function create_model() that returns a compiled model
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Train the model
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)
# Get validation loss and accuracy
val_loss = min(history.history['val_loss'])
val_accuracy = max(history.history['val_accuracy'])
print(f"Epochs: {epochs}, Batch Size: {batch_size}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print('Validation Accuracy:', val_accuracy)
print('Validation Loss:', val_loss)
# Check if this model has the best validation loss so far
if val_loss < best_val_loss:
best_val_loss = val_loss
best_val_accuracy = val_accuracy
best_model = model
best_epochs = epochs
best_batch_size = batch_size
print(f"\nBest model chosen based on validation loss is with size: {best_batch_size}, epochs: {best_epochs}")
print(f"Best Validation Loss: {best_val_loss}, Best Validation Accuracy: {best_val_accuracy}")
Epochs: 10, Batch Size: 128, Validation Loss: 0.4656331539154053, Validation Accuracy: 0.8521999716758728 157/157 [==============================] - 0s 2ms/step - loss: 0.4656 - accuracy: 0.8522 Validation Accuracy: 0.8521999716758728 Validation Loss: 0.46563300490379333 Best model chosen based on validation loss is with size: 128, epochs: 10 Best Validation Loss: 0.46563300490379333, Best Validation Accuracy: 0.8521999716758728
test_loss, test_accuracy = model.evaluate(X_val, y_val)
print('Test Accuracy:', test_accuracy)
print('Test Loss:', test_loss)
157/157 [==============================] - 0s 2ms/step - loss: 0.4656 - accuracy: 0.8522 Test Accuracy: 0.8521999716758728 Test Loss: 0.46563300490379333
metrics = history.history
training_loss_list = metrics['loss']
test_loss_list = metrics['val_loss']
import plotly.graph_objects as go
# Create traces
trace1 = go.Scatter(x=list(range(len(training_loss_list))), y=training_loss_list, mode='lines', name='Training Loss')
trace2 = go.Scatter(x=list(range(len(test_loss_list))), y=test_loss_list, mode='lines', name='Validation Loss')
# Edit the layout
layout = dict(title='Training and Validation Loss Over Epochs',
xaxis=dict(title='Epoch'),
yaxis=dict(title='Loss'),
)
# Create the figure
fig = go.Figure(data=[trace1, trace2], layout=layout)
# Show the figure
fig.show()
There is no significant difference after Epoch-2, the loss reduces but at the cost of computational resources hence, we can get the model's epoch to 2
Training Loss (Blue Line):
Validation Loss (Red Line):
Conclusion:
train_accuracy_list = metrics['accuracy']
test_accuracy_list = metrics['val_accuracy']
import plotly.graph_objects as go
# Assuming you have already defined and populated the metrics dictionary
training_accuracy_list = metrics['accuracy']
test_accuracy_list = metrics['val_accuracy']
# Create traces
trace1 = go.Scatter(x=list(range(len(training_accuracy_list))), y=training_accuracy_list, mode='lines', name='Training Accuracy')
trace2 = go.Scatter(x=list(range(len(test_accuracy_list))), y=test_accuracy_list, mode='lines', name='Validation Accuracy')
# Edit the layout
layout = dict(title='Training and Validation Accuracy Over Epochs',
xaxis=dict(title='Epoch'),
yaxis=dict(title='Accuracy'),
)
# Create the figure
fig = go.Figure(data=[trace1, trace2], layout=layout)
# Show the figure
fig.show()
Training Accuracy (Blue Line):
Validation Accuracy (Orange Line):
Conclusion
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Obtain model predictions for the test set
predictions = model.predict(X_val)
# Convert predictions from one-hot encoded format to integers
predicted_labels = np.argmax(predictions, axis=1)
# Calculate evaluation metrics
accuracy = accuracy_score(y_test, predicted_labels)
precision = precision_score(y_test, predicted_labels, average='weighted')
recall = recall_score(y_test, predicted_labels, average='weighted')
f1 = f1_score(y_test, predicted_labels, average='weighted')
# Create a DataFrame to display the evaluation metrics
metrics_df = pd.DataFrame({
'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
'Value': [accuracy, precision, recall, f1]
}).set_index('Metric').transpose()
metrics_df
157/157 [==============================] - 0s 2ms/step
| Metric | Accuracy | Precision | Recall | F1 Score |
|---|---|---|---|---|
| Value | 0.0978 | 0.097882 | 0.0978 | 0.097662 |
Accuracy (0.0978):
Precision (0.097882):
Recall (0.0978):
F1 Score (0.097662):
import matplotlib.pyplot as plt
# Obtain predictions from the model
predictions = model.predict(X_val)
# Choose an index
index = 250
plt.figure(figsize=(4, 4))
plt.imshow(X_val[index])
plt.axis('off')
plt.show()
157/157 [==============================] - 0s 2ms/step
print("Prediction:", apparel_items[np.argmax(predictions[index])])
Prediction: Coat
import random
random_indices = [random.randint(0, len(X_val) - 1) for _ in range(15)]
data = []
# Iterate over random indices and collect data
for index in random_indices:
prediction = apparel_items[np.argmax(predictions[index])]
actual = apparel_items[y_val[index][0]]
# Determine if the prediction matches the actual label
validation = "✔" if prediction == actual else "✖"
data.append([prediction, actual, validation])
# Define column names for the DataFrame
columns = ["Prediction", "Actual", "Validation"]
df = pd.DataFrame(data, columns=columns)
print(df)
Prediction Actual Validation 0 Coat Coat ✔ 1 Dress Dress ✔ 2 Pullover Coat ✖ 3 Ankle boot Ankle boot ✔ 4 Dress Dress ✔ 5 Bag Bag ✔ 6 Shirt Coat ✖ 7 Sandal Sandal ✔ 8 T-shirt/top T-shirt/top ✔ 9 Coat Coat ✔ 10 Ankle boot Ankle boot ✔ 11 T-shirt/top Shirt ✖ 12 T-shirt/top Shirt ✖ 13 Dress Dress ✔ 14 Sandal Sandal ✔
this output shows where model is correct with the model prediction and actual data
from sklearn.metrics import precision_score
# Obtain model predictions for the test set
predictions = model.predict(X_val)
predicted_labels = np.argmax(predictions, axis=1)
indices_class_5 = np.where(y_val == 5)[0]
y_test_class_5 = y_test[indices_class_5]
predicted_labels_class_5 = predicted_labels[indices_class_5]
# Calculate the actual precision for class 5
true_positives = np.sum(predicted_labels_class_5 == 5)
total_predicted_positives = np.sum(predicted_labels == 5)
actual_precision_class_5 = true_positives / total_predicted_positives
print("Actual Precision for Class 5:", actual_precision_class_5)
# Set the threshold
threshold = 0.7
# Binarize predictions based on the threshold for class 5
binarized_predictions_class_5 = (predictions[indices_class_5, 5] >= threshold).astype(int)
true_positives_adjusted = np.sum(binarized_predictions_class_5 == 1)
adjusted_precision_class_5 = true_positives_adjusted / np.sum(binarized_predictions_class_5)
print("Adjusted Precision for Class 5 (Threshold at 0.7):", adjusted_precision_class_5)
157/157 [==============================] - 0s 1ms/step Actual Precision for Class 5: 0.9403578528827038 Adjusted Precision for Class 5 (Threshold at 0.7): 1.0
Class 5 Actual Accuracy:
Adjusted Precision for Class 5:
Overall
from sklearn.metrics import recall_score
predictions = model.predict(X_val)
predicted_labels = np.argmax(predictions, axis=1)
indices_class_5 = np.where(y_val == 5)[0]
y_test_class_5 = y_test[indices_class_5]
predicted_labels_class_5 = predicted_labels[indices_class_5]
# Calculate the actual recall for class 5
true_positives = np.sum(predicted_labels_class_5 == 5)
total_positives = len(y_test_class_5)
actual_recall_class_5 = true_positives / total_positives
print("Actual Recall for Class 5:", actual_recall_class_5)
# Set the threshold
threshold = 0.7
binarized_predictions_class_5 = (predictions[indices_class_5, 5] >= threshold).astype(int)
true_positives_adjusted = np.sum(binarized_predictions_class_5 == 1)
adjusted_recall_class_5 = true_positives_adjusted / total_positives
print("Adjusted Recall for Class 5 (Threshold at 0.7):", adjusted_recall_class_5)
157/157 [==============================] - 0s 1ms/step Actual Recall for Class 5: 0.9220272904483431 Adjusted Recall for Class 5 (Threshold at 0.7): 0.9044834307992202
Real Recall for Class 5:
Adjusted Recall for Class 5 (Threshold @ 0.7):
Overall
Dataset Description
Model Structure
Model Working
Analysis of Loss and Accuracy
Evaluation on Precision and Recall
Glancing over Predictions
Adjusted Metrics for Precision and Recall
Overall, the model displays strong performance in fashion item classification, achieving high accuracy and effectively categorizing items across various classes.